#
# CallData.R
#
# Call in and recode data. Called from
# Analyze.R.

#
# Analysis parameters.
#
num.signals <- 4 # how many signals were given for this survey?
rounds <- c(sprintf("contest1%s",seq(1,num.signals+1)),sprintf("contest2%s",seq(1,num.signals+1)),sprintf("selfcon%s",seq(1,num.signals+1)))

#
# Data from Mechanical Turk.
#
long <- fread(file.path("indata", "AnalysisData-long.csv"))
wide <- fread(file.path("indata", "AnalysisData-wide.csv"))
wide[,quiz.score := as.numeric(quiz.score)]

#
# Merge/create weights.
#
library(matrixStats) # for weightedMedian().
library(Hmisc) # for wtd.quantile.
if (.use == "") {
  cat("RUNNING UNWEIGHTED ANALYSIS.\n")
  wide[,weight := 1]
} else { # if .use Pew, IQ-hi, or IQ-lo
  cat("RUNNING ANALYSIS WEIGHTED TO PEW.\n")
  # Weights raked to Pew marginals in rakeToPew.R.
  ps <- fread(file.path("indata","PewWeights.csv"))
  wide <- merge(wide,ps,by="resp.id",all.x=T,all.y=F)
  rm(ps)
}

# Optional: Run on subset by quiz score.
if (.use == "-iq-hi") {
  cat("RUNNING ANALYSIS ON HI-IQ ONLY.\n")
  wide[,weight := weight*(quiz.score > weightedMedian(quiz.score,weight))]
} else if (.use == "-iq-lo") {
  cat("RUNNING ANALYSIS ON LO-IQ ONLY.\n")
  wide[,weight := weight*(quiz.score < weightedMedian(quiz.score,weight))]
}

# Merge whatever weights applied to long.
long <- merge(long,wide[,c("resp.id","weight"),with=F],by="resp.id",all.x=T,all.y=F)

if (.use != "") {
  flag <- wide[,weight] == 0
  if (any(flag)) {
    cat("\nDropping responses from",sum(flag),"subjects assigned a weight of 0 in this analysis.\n")
  }
  wide <- wide[weight != 0,]
  long <- long[weight != 0,]
}

#
# Variable recodes.
#
library(car)
# Collapsed education.
wide[,educ2 := recode(substr(educ,1,12),"'Less than Hi'='01.HS or less'; 'High School '='01.HS or less'; 'Some College'='02.Some/2-yr College'; '2-year Colle'='02.Some/2-yr College';  '4-year Colle'='03.4-yr College/some postgrad'; 'Some postgra'='03.4-yr College/some postgrad'; 'Postgraduate'='04.Postgrad degree'")] # make HS excluded categ
wide[,table(educ2,substr(educ,1,12))]
# Join pid.summ from wide to long.
long <- merge(long,wide[,c("resp.id","pid.summ"),with=F],by="resp.id",all.x=T,all.y=F)
# Party indicators.
long[,is.dem := 1*(pid.summ == "Democrat")]
long[,is.gop := 1*(pid.summ == "Republican")]

#
# List of facts, sorted by favors GOP first, 
# favors Democrats second.
#
facts <- c("Romney 2012 Vote Share\n(True favors Dems)"="In the 2012 Presidential Election, Barack Obama defeated the Republican Mitt Romney. In the nation as a whole, of all the votes cast for Obama and Romney, Romney won less than 48 percent. (FALSE)", 
"Reagan Debt Change\n(True favors Dems)"="The total public debt of the United States federal government more than doubled from quarter 2 in 1981 to quarter 1 in 1989 while Ronald Reagan was president. (TRUE)", 
"Bush Unemployment Change\n(True favors Dems)"="From January 2001, when President Bush first took office, to January 2005, when President Bush started his second term in office, the civilian unemployment rate increased by more than 1 percentage point. (TRUE)",
"Reagan vs Clinton Abortion Change\n(True favors Reps)"="The rate at which American women aged 15-44 had legal abortions fell more between 1980 and 1988, while Ronald Reagan was president, than between 1992 and 2000, while Bill Clinton was president. (FALSE)", 
"Obama Household Income Change\n(True favors Reps)"="From 2009, when President Obama took office, to 2012, median household income adjusted for inflation in the United States fell by more than 4 percent. (TRUE)", 
"Kerry 2004 Vote Share\n(True favors Reps)"="In the 2004 Presidential Election, John Kerry was defeated by George W. Bush. In the nation as a whole, of all the votes cast for Kerry and Bush, Kerry won less than 48 percent. (FALSE)"
)
# Data.table of facts with shorter names.
dt.facts <- data.table(fact.short=names(facts),fact=facts)
# Create extra short name.
dt.facts <- merge(dt.facts,data.table(fact.short=c('Romney 2012 Vote Share\n(True favors Dems)', 'Reagan Debt Change\n(True favors Dems)', 'Bush Unemployment Change\n(True favors Dems)', 'Reagan vs Clinton Abortion Change\n(True favors Reps)', 'Obama Household Income Change\n(True favors Reps)', 'Kerry 2004 Vote Share\n(True favors Reps)'),fact.short2=c("Romney Share","Reagan Debt","Bush Unemploy","Abortion","Obama Income","Kerry Share")),by="fact.short")
# Indicator for true signal favors Democrats.
dt.facts[,dems.favored := 1*(regexpr("favors Dems",fact.short) != -1)]
# Merge contest facts from wide to long so that
# indicator about yes favoring dems can be merged
# to long.
aa <- copy(wide[,c("resp.id","fact_contest1"),with=F])
setnames(aa,c("resp.id","fact"))
aa[,stub := "contest1"]
ab <- copy(wide[,c("resp.id","fact_contest2"),with=F])
setnames(ab,c("resp.id","fact"))
ab[,stub := "contest2"]
aa <- rbindlist(list(aa,ab))
setkey(aa,resp.id,stub)
long <- merge(long,aa,by=c("resp.id","stub"),all.x=T,all.y=F)
rm(aa,ab)
# Check merge.
long[,table(stub,is.na(fact))]

# For plotting, facts vector includes string for 
# (TRUE)/(FALSE). Remove for merging to long.
dt.facts[,fact := gsub(" \\(TRUE\\)| \\(FALSE\\)","",fact)]

# Merge indicator that true favors dems to long.
long <- merge(long,dt.facts[,c("fact","fact.short","fact.short2","dems.favored"),with=F],by="fact",all.x=T)
long[stub != "selfcon",table(fact.short,dems.favored)]

#
# Identify for each contest whether the signal
# TRUE favors the subject's party or not.
# NA=non-partisan subject.
#
# Create variable measuring if true favors party.
long[pid.summ == "Democrat",true.favors.party := dems.favored]
long[pid.summ == "Republican",true.favors.party := 1-dems.favored]
long[,table(true.favors.party,stub,exclude=NULL)]
long[stub != "selfcon", table(fact.short,true.favors.party,pid.summ,exclude=NULL)]

#
# Use logit from package car rather than package boots
# to deal with 0s, 1s, and 100s. Parameter 
# `logit.cert.adjust' 
# determines where 0s and 100s are placed. Inherits
# argument logit.cert.adjust from sourcing file.
#
library(car)
if (is.na(logit.cert.adjust)) {
  cat("Setting logit version of pr.true and pr.true.prev %in% c(0, 100)
  to NA.\n")
  long[,logit.cert := car:::logit(pr.true, percents=TRUE)]
  long[,logit.cert.prev := car:::logit(pr.true.prev, percents=TRUE)]
  long[pr.true %in% c(0,100),logit.cert := NA]
  long[pr.true.prev %in% c(0,100),logit.cert.prev := NA]
} else {
  cat(sprintf("Setting logit version of pr.true and pr.true.prev %%in%% c(0, 100)
  to c(logit(%1.3f), logit(%1.3f)).\n",
  logit.cert.adjust,1-logit.cert.adjust))
  long[,logit.cert := car:::logit(pr.true, percents=TRUE, adjust=logit.cert.adjust)]
  long[,logit.cert.prev := car:::logit(pr.true.prev, percents=TRUE, adjust=logit.cert.adjust)]
}
